clear all
set more off
set mem 10000000
set matsize 10000

****************************************************************** 
*** Census Panel Merge (all 6 Census datasets)  ******************
****************************************************************** 

** Set file paths
do "$path_code/paths.do"

********************************************************************************
********************************************************************************

** Step 1: Merge all 6 Census datasets, only on identifiers for now 
       // (bring in outcomes later, after nailing down this merge)
{

  // Start with 2001 PCA
use "$pca/pca_census01.dta", clear
rename bk_code_pca bk_code
keep pca01_id - tot_f

  // Merge with 2001 HPCA
merge m:1 st_code dt_code bk_code using "$hpca01/hpca01_blockwise.dta", keep(1 3) nogen
      // an incredibly clean merge, all the block names match within 1 spelling irregularity
drop hpca01*

  // Merge with 2001 VD
merge m:m st_code dt_code vi_code using "$vd01/vd_2001.dta", keep(3) nogen
gen pop2001_mismatch = t_p_vd!=tot_p
     // Meghalaya is a mess in this merge, so good thing it's only 180 villages in our RD sample... 
     // Everything else is fine, except ~2% of villages in Bihar...
drop area_01 edu_fac_01 - area_na_cu_01

  // Merge with 2011 PCA
merge m:m st_code dt_code vi_code using "$pca/pca_census11.dta", keep(3) nogen
drop p_06_11-marg_ot_f_03_11	
      // match rates over 96% for all states except 2 (Sikkim and Kerala)

  // Merge with 2011 HPCA
merge m:m st_code11 dt_code11 bk_code11 vi_code11 using "$hpca11/hpca11.dta", keep(3) nogen
drop hpca11*
      // fairly clean merge, even using bk_code11. we lose 979 villages from the master dataset,
			// and there are 8911 extra villages in the using (3591 of which are in UP, Assam, Meghalaya)

  // Merge with 2011 VD
merge m:m st_code11 dt_code11 vi_code11 using "$vd11/vd_2011.dta", keep(3) nogen
*gen pop2011_mismatch = t_p_vd11!=tot_p11 // all populations seem to match
drop panch_code11-dist_town_11


*egen match_state = max(_merge), by(st_code11)
*tab st_code11 _merge if match_state==3
	
}	



********************************************************************************
********************************************************************************

** Step 2: Bring in PCA 2001 & 2011 Outcomes (common to both datasets)
{
merge m:1 pca01_id using "$pca/pca_census01.dta", nogen keep(3) keepusing(pct_06_01-work_marg_ot_f_01)
merge m:1 pca11_id conc_id using "$pca/pca_census11.dta", nogen keep(3) keepusing(pct_06_11-work_marg_ot_f_11)

  // confirm that for every PCA01 variable, we have a matching PCA11 variable
foreach v of varlist pct_06_01-work_marg_ot_f_01 {
  local newvar = substr("`v'",1,length("`v'")-3)  + "_11"
	qui sum `newvar'
}
}


********************************************************************************
********************************************************************************

** Step 3: Bring in HPCA 2001 & 2011 Outcomes (adjusted and redefined to have matching pre/post outcomes)
{
merge m:1 st_code dt_code bk_code using "$hpca01/hpca01_blockwise.dta", nogen keep(3) keepusing(hpca01*)
merge m:1 v_id_hpca11 using "$hpca11/hpca11.dta", nogen keep(3) keepusing(hpca11*)

  // combine pre/post variables for the 7 unmatched HPCA01 variables
gen hpca11_lat_pit_svi_sop = hpca11_lat_pit_svi + hpca11_lat_pit_sop
gen hpca11_bath_bath_ewr = hpca11_bath + hpca11_bath_ewr
gen hpca11_assets_tel_l_m_b = hpca11_assets_tel_l + hpca11_assets_tel_m + hpca11_assets_tel_b
gen hpca11_mat_r_hmt_mmt = hpca11_mat_r_hmt + hpca11_mat_r_mmt
gen hpca11_mat_w_snpwm_spwm = hpca11_mat_w_snpwm + hpca11_mat_w_spwm
gen hpca11_dw_tfts_tfus = hpca11_dw_tfts + hpca11_dw_tfus
gen hpca11_dw_cw_uw = hpca11_dw_cw + hpca11_dw_uw
	
	
  // copy 2001 labels to these 7 new 2011 variables
la var hpca11_lat_pit_svi_sop "% HH with slab/ventilated or open pit latrine"
la var hpca11_bath_bath_ewr "% HH with bathing room available (with or without roof)"
la var hpca11_assets_tel_l_m_b "% HH with phone (landline and/or mobile)"
la var hpca11_mat_r_hmt_mmt "% HH with roof made of hand-made or machine-made tile"
la var hpca11_mat_w_snpwm_spwm "% HH with walls made of stone (packed or not packed with mortar)"
la var hpca11_dw_tfts_tfus "% HH with tap water from treated or untreated source"
la var hpca11_dw_cw_uw "% HH with tap water from covered or uncovered well"


  // confirm that for every PCA01 variable, we have a matching PCA11 variable
foreach v of varlist hpca01_* {
  local newvar = subinstr("`v'","hpca01","hpca11",1)
	qui sum `newvar'
}

  // tag post-only HPCA variables
foreach v of varlist hpca11_* {
  local newvar = subinstr("`v'","hpca11","hpca01",1)
	cap sum `newvar'
	if _rc!=0 {
    local newname = subinstr("`v'","hpca11","hpca11p",1)
		rename `v' `newname'
  }
}
foreach v of varlist hpca11_lat_pit_svi hpca11_assets_tel_l hpca11_mat_r_hmt hpca11_mat_w_snpwm hpca11_dw_tfts hpca11_dw_cw {
  local newname = subinstr("`v'","hpca11","hpca11p",1)
	rename `v' `newname'
}

  // drop post-only HPCA variables that we won't be using
drop hpca11p_mat_* hpca11p_dw_* hpca11p_lat_* hpca11p_no_lat_* hpca11p_bath* hpca11p_cook_*
}


********************************************************************************
********************************************************************************

** Step 4: Bring in VD 2001 & 2011 Outcomes (adjusted and redefined to have matching pre/post outcomes)
{
merge m:1 vd01_id using "$vd01/vd_2001.dta", nogen keep(3) 
merge m:1 vd11_id using "$vd11/vd_2011.dta", nogen keep(3)
drop panch_code11-panchayat_11 subdist_hq_11-near_st_town_dist_11 near_town*
rename ref_year_11 vd_ref_year_11

  // Tag VD variable names by group
{	
foreach v of varlist edu_fac_01-oth_sch_01 pp_sch_g_11-rang_oth_sch_11 { 
	local newname = "edu_" + "`v'"
	rename `v' `newname'  // tag education variables with a uniform prefix
}
foreach v of varlist medi_fac_01-oth_cntr_01 ch_cntr_11-ng_med_oth_11 { 
	local newname = "hea_" + "`v'"
	rename `v' `newname'  // tag public health variables with a uniform prefix
}
foreach v of varlist drnk_wat_f_01-ss_code_01 tap_tr_11-other_sum_11 { 
	local newname = "wat_" + "`v'"
	rename `v' `newname'  // tag water variables with a uniform prefix
}
foreach v of varlist drain_cl_11-com_no_waste_disp_11 { 
	local newname = "san_" + "`v'"
	rename `v' `newname'  // tag santitation variables with a uniform prefix
}
foreach v of varlist p_t_fac_01-comm_fac_01 pap_mag_01-magazine_01 post_off_fac_11-rang_courier_11 news_pap_fac_11-rang_news_pap_11 { 
	local newname = "com_" + "`v'"
	rename `v' `newname'  // tag communication variables with a uniform prefix
}
foreach v of varlist bs_fac_01-rang_nw_01 app_pr_01-app_nw_01 bs_pri_fac_11-rang_footpath_11 { 
	local newname = "tra_" + "`v'"
	rename `v' `newname'  // tag transportation variables with a uniform prefix
}
foreach v of varlist bank_fac_01-rang_oth_01 atm_fac_11-rang_acs_11 { 
	local newname = "fin_" + "`v'"
	rename `v' `newname'  // tag financial variables with a uniform prefix
}
foreach v of varlist rc_fac_01-rang_stau_01 shg_fac_11-rang_read_room_11 assemb_ps_fac_11-rang_bdro_11 { 
	local newname = "ame_" + "`v'"
	rename `v' `newname'  // tag amenities variables with a uniform prefix
}
foreach v of varlist power* { 
	local newname = subinstr("`v'","power_","pwr_",1)
	rename `v' `newname'  // tag power variables with a uniform prefix
}
foreach v of varlist land_fores_01-area_na_cu_01 area_fores_11-area_irr_oth_11 dist_town* area_01 area_11 { 
	local newname = "geo_" + "`v'"
	rename `v' `newname'  // tag geographic variables with a uniform prefix
}
}

  // Construct matching pre/post variables, where possible
{	    
			// Education variables
			{
				gen edu_p_sch_11 = edu_p_sch_g_11 + edu_p_sch_p_11
				gen edu_m_sch_11 = edu_m_sch_g_11 + edu_m_sch_p_11
				gen edu_s_sch_11 = edu_s_sch_g_11 + edu_s_sch_p_11
				gen edu_s_s_sch_11 = edu_s_s_sch_g_11 + edu_s_s_sch_p_11
				rename edu_college_01 edu_coll_01
				egen edu_coll_11 = rowtotal(edu_college_*_g_11 edu_college_*_p_11)
				egen edu_rang_coll_11 = rowmin(edu_rang_college*_11)
				replace edu_rang_coll_11 = 0 if edu_coll_11>0 & edu_coll_11!=.
				egen edu_tr_sch_11 = rowtotal(edu_tr_sch_*_g_11 edu_tr_sch_*_p_11)
				gen edu_oth_sch_11 = edu_oth_sch_g_11 + edu_oth_sch_p_11
				rename edu_edu_fac_01 edu_fac_01
				egen edu_fac_11 = rowtotal(edu_p_sch_11 edu_m_sch_11 edu_s_sch_11 edu_s_s_sch_11 edu_coll_11 edu_tr_sch_11 edu_oth_sch_11)
				replace edu_fac_11 = edu_fac_11>0
				
				la var edu_p_sch_11 "Number of primary schools"				
				la var edu_m_sch_11 "Number of middle schools"
				la var edu_s_sch_11 "Number of secondary schools"
				la var edu_s_s_sch_11 "Number of senior secondary schools"
				la var edu_coll_11 "Number of colleges"
				la var edu_rang_coll_11 "Range to nearest college (1 = <5km, 2 = 5-10km, 3 = >10km)"
				la var edu_tr_sch_11 "Number of training schools"
				la var edu_oth_sch_11 "Number of other educational facilities"				
				la var edu_fac_11 "Education facilities (Y/N)"				
			}
			
			// Public health variables
			{
				egen hea_disp_01 = rowtotal(hea_all_disp_01 hea_ayu_disp_01 hea_hom_disp_01 hea_un_disp_01)
				egen hea_alt_hosp_01 = rowtotal(hea_ayu_hosp_01 hea_hom_hosp_01 hea_un_hosp_01)
				rename hea_fwc_cntr_01 hea_fw_cntr_01
				egen hea_medi_fac_11 = rowtotal(hea_ch_cntr_11 hea_ph_cntr_11 hea_phs_cnt_11 hea_mcw_cntr_11 hea_tb_cln_11 hea_all_hosp_11 hea_alt_hosp_11 hea_disp_11 hea_mh_clin_11 hea_fw_cntr_11 hea_ng_med_op_11 hea_ng_med_iop_11 hea_ng_med_char_11 hea_ng_med_oth_11 hea_ng_med_ms_11 hea_ng_med_tp_fh_11 hea_ng_med_deg_none_11 hea_ng_med_deg_oth_11 hea_ng_med_deg_mbbs_11)
				replace hea_medi_fac_11 = hea_medi_fac_11>0
				
				rename hea_phs_cnt_01 hea_ph_subcntr_01
				rename hea_phs_cnt_11 hea_ph_subcntr_11
				rename hea_rang_all_01 hea_rang_all_hosp_01
				rename hea_rang_all_11 hea_rang_all_hosp_11
				rename hea_rang_mcw_01 hea_rang_mcw_cntr_01
				rename hea_rang_mcw_11 hea_rang_mcw_cntr_11
				rename hea_rang_phc_01 hea_rang_ph_cntr_01
				rename hea_rang_phc_11 hea_rang_ph_cntr_11
	
				la var hea_disp_01 "Number of dispensaries"				
				la var hea_alt_hosp_01 "Number of alternative medicine hospitals"				
				la var hea_medi_fac_11 "Medical facilities (Y/N)"
		  }

			// Water variables
			{
				egen wat_river_canal_01 = rowmax(wat_canal_01 wat_river_01)
				egen wat_tank_lake_01 = rowmax(wat_tank_01 wat_lake_01)
				egen wat_tap_11 = rowmax(wat_tap_tr_11 wat_tap_untr_11)
				egen wat_well_11 = rowmax(wat_well_cov_11 wat_well_uncov_11)
				rename wat_drnk_wat_f_01 wat_any_fac_01
				egen wat_any_fac_11 = rowmax(wat_tap_11 wat_well_11 wat_handpump_11 wat_tubewell_11 wat_spring_11 wat_tank_lake_11 wat_river_canal_11 wat_other_11)
				replace wat_sou_summ_01 = subinstr(wat_sou_summ_01," ","",.)
				gen wat_tap_sum_01 = (regexm(wat_sou_summ_01,"T,") | substr(wat_sou_summ_01,-1,1)=="T")
				gen wat_well_sum_01 = (regexm(wat_sou_summ_01,"W,") | substr(wat_sou_summ_01,-1,1)=="W")
				gen wat_handpump_sum_01 = (regexm(wat_sou_summ_01,"HP"))
				gen wat_tubewell_sum_01 = (regexm(wat_sou_summ_01,"TW"))
				gen wat_spring_sum_01 = (regexm(wat_sou_summ_01,"S"))
				gen wat_river_canal_sum_01 = (regexm(wat_sou_summ_01,"R") | regexm(wat_sou_summ_01,"C"))
				gen wat_tank_lake_sum_01 = (regexm(wat_sou_summ_01,"TK") | regexm(wat_sou_summ_01,"L"))
				gen wat_other_sum_01 = (regexm(wat_sou_summ_01,"O"))
				egen wat_tap_sum_11 = rowmax(wat_tap_tr_sum_11 wat_tap_untr_sum_11)
				egen wat_well_sum_11 = rowmax(wat_well_cov_sum_11 wat_well_uncov_sum_11)

				la var wat_river_canal_01 "River/canal water (Y/N)"				
				la var wat_tank_lake_01 "Tank/pond/lake water (Y/N)"				
				la var wat_tap_11 "Tap water, treated or untreated (Y/N)"				
				la var wat_well_11 "Well water, covered or uncovered (Y/N)"				
				la var wat_any_fac_11 "Drinking water facilities (Y/N)"	
				la var wat_tap_sum_01 "Tap water, summer (Y/N)"
				la var wat_well_sum_01 "Well water, summer (Y/N)"
				la var wat_handpump_sum_01 "Handpump water, summer (Y/N)"
				la var wat_tubewell_sum_01 "Tubewell water, summer (Y/N)"
				la var wat_spring_sum_01 "Spring water, summer (Y/N)"
				la var wat_river_canal_sum_01 "River/canal water, summer (Y/N)"
				la var wat_tank_lake_sum_01 "Tank/lake water, summer (Y/N)"
				la var wat_other_sum_01 "Other water source, summer (Y/N)"
				la var wat_tap_sum_11 "Tap water, summer (Y/N)"
				la var wat_well_sum_11 "Well water, summer (Y/N)"
			}

			// Communication variables
			{
				gen com_post_off_fac_01 = com_post_off_01
				replace com_post_off_fac_01 = 1 if com_post_off_fac_01>=1 & com_post_off_fac_01!=.
				rename com_rang_po_01 com_rang_post_off_01
				rename com_rang_po_11 com_rang_post_off_11
				gen com_post_tele_fac_01 = com_post_tele_01
				replace com_post_tele_fac_01 = 1 if com_post_tele_fac_01>=1 & com_post_tele_fac_01!=.
				gen com_phone_ll_01 = com_phone_01
				replace com_phone_ll_01 = 1 if com_phone_ll_01>=1 & com_phone_ll_01!=.
				rename com_rang_phone_01 com_rang_phone_ll_01
				rename com_news_pap_01 com_newspap_fac_01
				rename com_news_pap_fac_11 com_newspap_fac_11
				replace com_newspap_fac_01 = 0 if com_newspap_fac_01==.
				
				la var com_post_off_fac_01 "Post office (Y/N)"
				la var com_post_tele_fac_01 "Post and telegraph facilities (Y/N)"
				la var com_phone_ll_01 "Telephone (landlines) (Y/N)"
				la var com_rang_phone_ll_01 "Range to nearest telephone (landline) (1 = <5km, 2 = 5-10km, 3 = >10km)"
			}
			
			// Transportation variables
			{
				egen tra_bs_fac_11 = rowmax(tra_bs_pri_fac_11 tra_bs_pub_fac_11)
				egen tra_rang_bs_11 = rowmin(tra_rang_bs_pri_11 tra_rang_bs_pub_11)
				replace tra_rang_bs_11 = 0 if tra_bs_fac_11>0 & tra_bs_fac_11!=.
				rename tra_nw_fac_01 tra_navig_fac_01
				rename tra_rang_nw_01 tra_rang_navig_01
				rename tra_bs_fac_01 tra_bus_fac_01
				rename tra_rang_bs_01 tra_rang_bus_01
				rename tra_bs_fac_11 tra_bus_fac_11
				rename tra_rang_bs_11 tra_rang_bus_11
				rename tra_rs_fac_01 tra_rail_fac_01
				rename tra_rang_rs_01 tra_rang_rail_01
				rename tra_rs_fac_11 tra_rail_fac_11
				rename tra_rang_rs_11 tra_rang_rail_11
				
				la var tra_bus_fac_11 "Bus service (Y/N)"
				la var tra_rang_bus_11 "Range to nearest bus service (1 = <5km, 2 = 5-10km, 3 = >10km)"
			}
			
			// Financial variables
			{ 
				gen fin_comm_bank_fac_01 = fin_comm_bank_01
				replace fin_comm_bank_fac_01 = 1 if fin_comm_bank_fac_01>=1 & fin_comm_bank_fac_01!=.
				rename fin_rang_comm_01 fin_rang_comm_bank_01
				rename fin_rang_comm_11 fin_rang_comm_bank_11
				gen fin_coop_bank_fac_01 = fin_coop_bank_01
				replace fin_coop_bank_fac_01 = 1 if fin_coop_bank_fac_01>=1 & fin_coop_bank_fac_01!=.
				rename fin_rang_coop_01 fin_rang_coop_bank_01
				rename fin_rang_coop_11 fin_rang_coop_bank_11
				gen fin_ac_soc_fac_01 = fin_ac_soc_01
				replace fin_ac_soc_fac_01 = 1 if fin_ac_soc_fac_01>=1 & fin_ac_soc_fac_01!=.
				replace fin_ac_soc_fac_01 = . if fin_ac_soc_fac_01!=0 & fin_ac_soc_fac_01!=1 & fin_ac_soc_fac_01!=.
				rename fin_rang_acs_01 fin_rang_ac_soc_01
				rename fin_rang_acs_11 fin_rang_ac_soc_11
				egen fin_bank_fac_11 = rowmax(fin_comm_bank_fac_11 fin_coop_bank_fac_11)

				la var fin_comm_bank_fac_01 "Commercial bank (Y/N)"
				la var fin_coop_bank_fac_01 "Cooperative bank (Y/N)"
				la var fin_ac_soc_fac_01 "Agricultural cerdit societies (Y/N)"
				la var fin_bank_fac_11 "Banking facilities (Y/N)"
				
				gen fin_rang_atm_11 = .
				replace fin_rang_atm_11 = 0 if fin_atm_fac_11==1
				replace fin_rang_atm_11 = 1 if fin_atm_fac_11==0 & fin_dist_atm_km_11=="A"
				replace fin_rang_atm_11 = 2 if fin_atm_fac_11==0 & fin_dist_atm_km_11=="B"
				replace fin_rang_atm_11 = 3 if fin_atm_fac_11==0 & fin_dist_atm_km_11=="C"
				destring fin_dist_atm_km_11, force replace
				replace fin_rang_atm_11 = 1 if fin_dist_atm_km_11>0 & fin_dist_atm_km_11<=5
				replace fin_rang_atm_11 = 1 if fin_dist_atm_km_11>5 & fin_dist_atm_km_11<=10
				replace fin_rang_atm_11 = 1 if fin_dist_atm_km_11>10 & fin_dist_atm_km_11!=.
				la var fin_rang_atm_11 "Range to nearest ATM (1 = <5km, 2 = 5-10km, 3 = >10km)"
				drop fin_dist_atm_km_11
			}
			
			// Amenities variables
			{ 
				gen ame_cv_hall_fac_01 = ame_c_v_hall_01
				replace ame_cv_hall_fac_01 = 1 if ame_cv_hall_fac_01>=1 & ame_cv_hall_fac_01!=.
				rename ame_c_v_hall_fac_11 ame_cv_hall_fac_11
				rename ame_rang_cv_01 ame_rang_cv_hall_01
				rename ame_rang_cv_11 ame_rang_cv_hall_11
				rename ame_sp_cl_fac_11 ame_spcl_fac_11
				replace ame_sp_cl_fac_01 = ame_sp_cl_fac_01>0
				rename ame_sp_cl_fac_01 ame_spcl_fac_01

				la var ame_cv_hall_fac_01 "Cinema, video hall (Y/N)"
				la var ame_spcl_fac_01 "Sports club, rec center (Y/N)"
			}
			
			// Power variables
			{
				replace pwr_dom_01 = 1 if pwr_all_01==1
				replace pwr_agr_01 = 1 if pwr_all_01==1
				replace pwr_oth_01 = 1 if pwr_all_01==1
				egen pwr_any_01 = rowmax(pwr_dom_01 pwr_agr_01 pwr_oth_01 pwr_all_01)
				replace pwr_any_01 = pwr_supl_01 if pwr_any_01==.
				drop pwr_supl_01
				replace pwr_all_01 = 0 if pwr_all_01==. & pwr_any_01==0
				replace pwr_dom_01 = 0 if pwr_dom_01==. & pwr_any_01==0
				replace pwr_agr_01 = 0 if pwr_agr_01==. & pwr_any_01==0
				replace pwr_oth_01 = 0 if pwr_oth_01==. & pwr_any_01==0
				
				replace pwr_dom_11 = 1 if pwr_all_11==1
				replace pwr_agr_11 = 1 if pwr_all_11==1
				replace pwr_com_11 = 1 if pwr_all_11==1
				egen pwr_any_11 = rowmax(pwr_dom_11 pwr_agr_11 pwr_com_11 pwr_all_11)
				replace pwr_all_11 = 0 if pwr_all_11==. & pwr_any_11==0
				replace pwr_dom_11 = 0 if pwr_dom_11==. & pwr_any_11==0
				replace pwr_agr_11 = 0 if pwr_agr_11==. & pwr_any_11==0
				replace pwr_com_11 = 0 if pwr_com_11==. & pwr_any_11==0

				la var pwr_dom_01 "Electricity for domestic use (Y/N)"
				la var pwr_agr_01 "Electricity for agricultural use (Y/N)"
				la var pwr_oth_01 "Electricity for other uses (Y/N)"
				la var pwr_all_01 "Electricity for all uses (Y/N)"
				la var pwr_any_01 "Electricity for any uses (Y/N)"
				la var pwr_any_11 "Electricity for any uses (Y/N)"
			}
			
			// Geographic variables
			{ 
				rename geo_area_irrigated_11 geo_area_irr_11
				rename geo_area_unirrigated_11 geo_area_unirr_11
				rename geo_tot_irr_01 geo_area_irr_01
				rename geo_un_irr_01 geo_area_unirr_01
				rename geo_w_fall_01 geo_area_irr_wfall_01
				rename geo_area_irr_waterfall_11 geo_area_irr_wfall_11
				egen geo_area_irr_w_twell_01 = rowtotal(geo_well_*_01 geo_tw_*_01)
				egen geo_area_irr_tank_lake_01 = rowtotal(geo_tank_irr_01 geo_lake_irr_01)
				egen geo_area_irr_canal_01 = rowtotal(geo_canal_*_01)
				rename geo_land_fores_01 geo_area_fores_01
				rename geo_cult_waste_01 geo_area_cult_waste_01
				rename geo_oth_irr_01 geo_area_irr_oth_01
				replace geo_area_irr_oth_01 = geo_area_irr_oth_01 + geo_river_irr_01
				
				la var geo_area_irr_w_twell_01 "Area irrigated by wells/tube wells (hectares)"
				la var geo_area_irr_tank_lake_01 "Area irrigated by tanks/lakes (hectares)"
				la var geo_area_irr_canal_01 "Area irrigated by canals (hectares)"				
				la var geo_area_irr_oth_01 "Area irrigated by other source, incl river (hectares)"				
				la var geo_area_irr_oth_11 "Area irrigated by other source, incl river (hectares)"				
			} 
}
 
	// Tag matching pre/post variables with the prefix "vd_"			
{
foreach v01 of varlist geo_area_01-geo_area_irr_canal_01 {
  if substr("`v01'",-3,3)=="_01" {
		local v11 = subinstr("`v01'","_01","_11",1)
		local v01_new = "vd_" + "`v01'" 
		local v11_new = "vd_" + "`v11'" 
		capture {
			sum `v11'
			rename `v01' `v01_new'
			rename `v11' `v11_new'
		}
	}
}
}

  // Define (or tag) pre- or post-only variables of interest (with prefixes "vdpr_" or "vdp_")
{
			// Education variables
			{
			  gen vdp_edu_pp_sch_11 = edu_pp_sch_g_11 + edu_pp_sch_p_11
				
				la var vdp_edu_pp_sch_11 "Number of pre-primary schools (nursery/LKG/UKG)"
			}
			// Public health variables
			{
				egen vdp_hea_dr_ts_11 = rowtotal(hea_*_dr_ts_11)
				egen vdp_hea_dr_ip_11 = rowtotal(hea_*_dr_ip_11)	
				egen vdp_hea_ps_ts_11 = rowtotal(hea_*_ps_ts_11)	
				egen vdp_hea_ps_ip_11 = rowtotal(hea_*_ps_ip_11)	
				egen vdp_hea_ng_med_fac_11 = rowtotal(hea_ng_med_*_11)
				replace	vdp_hea_ng_med_fac_11 = vdp_hea_ng_med_fac_11>0
				rename hea_vet_hosp_11 vdp_hea_vet_hosp_11
				rename hea_mh_clin_11 vdp_hea_mh_clin_11
				
				la var vdp_hea_dr_ts_11 "Number of total doctor-facilities (total strength)"
				la var vdp_hea_dr_ip_11 "Number of total doctor-facilities (in position)"
				la var vdp_hea_ps_ts_11 "Number of total para staff-facilities (total strength)"
				la var vdp_hea_ps_ip_11 "Number of total para staff-facilities (in position)"
				la var vdp_hea_ng_med_fac_11 "Non-gov't medical facilities (Y/N)"
			}
			
			// Water variables
			{
			  egen vdp_wat_all_year_11 = rowmax(wat_*_ay_11)
				
				la var vdp_wat_all_year_11 "Water all year (Y/N)"
			}

			// Sanitation variables
			{
			  foreach v of varlist san_drain_cl_11 san_drain_op_11 san_drain_none_11 {
				  rename `v' vdp_`v'
				}
				egen vdp_san_com_toilet_11 = rowmax(san_com_toilet_*bath_11)
				rename san_com_waste_disp_11 vdp_san_waste_disp_hh_11
				gen vdp_san_com_waste_disp_11 = 1 - san_com_no_waste_disp_11
				
				la var vdp_san_com_toilet_11 "Community toilet complex (Y/N)"
				la var vdp_san_waste_disp_hh_11 "Community waste disposal, house-to-house collection (Y/N)"
				la var vdp_san_com_waste_disp_11 "Community waste disposal system (Y/N)"
			}

			// Communication variables
			{
				rename com_phone_mob_fac_11 vdp_com_phone_mob_fac_11
				rename com_rang_phone_mob_11 vdp_com_rang_phone_mob_11
				rename com_int_cafe_fac_11 vdp_com_int_cafe_fac_11
				rename com_rang_int_cafe_11 vdp_com_rang_int_cafe_11
			}
			
			// Transportation variables
			{
			  foreach v of varlist tra_auto_fac_11 tra_taxi_fac_11 tra_van_fac_11 tra_tractor_fac_11 tra_carts_fac_11 ///
				                     tra_ferry_fac_11 tra_ntl_hwy_fac_11 tra_st_hwy_fac_11 tra_road_md_fac_11 tra_road_od_fac_11 ///
									 tra_paved_road_fac_11 tra_grav_road_fac_11 tra_aw_road_fac_11 tra_footpath_fac_11 {
					rename `v' vdp_`v'
				}
				rename tra_app_pr_01 vdpr_tra_app_pr_01
				rename tra_app_mr_01 vdpr_tra_app_mr_01
				rename tra_app_fp_01 vdpr_tra_app_fp_01

				egen vdp_tra_rick_fac_11 = rowmax(tra_rickshaw_*_fac_11)
				
				la var vdp_tra_rick_fac_11 "Cycle-pulled rickshaws, manual/machine (Y/N)"	
			}
			
			// Financial variables
			{
				rename fin_atm_fac_11 vdp_fin_atm_fac_11
				rename fin_rang_atm_11 vdp_fin_rang_atm_11
			}
			
			// Amenities variables
			{
				foreach v of varlist ame_shg_fac_11 ame_pub_dist_fac_11 ame_mandi_fac_11 ame_haat_fac_11 ame_am_soc_fac_11 ///
				                     ame_asha_fac_11 ame_comm_cntr_fac_11 ame_library_fac_11 ame_read_room_fac_11 ame_assemb_ps_fac_11 {
					rename `v' vdp_`v'
				}
				
				egen vdp_ame_nut_cntr_fac_11 = rowmax(ame_nut_cntr_*_fac_11)
				la var vdp_ame_nut_cntr_fac_11 "Nutritional center (Y/N)"
			}
			
			// Power variables
			{
			  foreach v of varlist pwr_*_11 {
				  rename `v' vdp_`v'
				}
				foreach v of varlist pwr_*_01 {
				  rename `v' vdpr_`v'
				}
			}
			// Geographic variables
			{
			  foreach v of varlist geo_area_*_11 {
				  rename `v' vdp_`v'
				}
			  foreach v of varlist geo_*_01 {
				  rename `v' vdpr_`v'
				}
			}
}

  // Standardize names of dummy variables ("_d_" means dummy)
{
foreach v of varlist vd_* {
  local label : variable label `v'
	if (regexm("`label'","Y/N") | regexm("`v'","_fac_")) {
		assert `v'==0 | `v'==1 | `v'==.
		local prefix = substr("`v'",1,6)
		local suffix = subinstr(substr("`v'",7,100),"_fac_","_",1)
		local newname = "`prefix'" + "_d" + "`suffix'"
		rename `v' `newname'
	}
}
foreach v of varlist vdpr_* {
  local label : variable label `v'
	if (regexm("`label'","Y/N") | regexm("`v'","_fac_")) {
		assert `v'==0 | `v'==1 | `v'==.
		local prefix = substr("`v'",1,8)
		local suffix = subinstr(substr("`v'",9,100),"_fac_","_",1)
		local newname = "`prefix'" + "_d" + "`suffix'"
		rename `v' `newname'
	}
}
foreach v of varlist vdp_* {
  local label : variable label `v'
	if (regexm("`label'","Y/N") | regexm("`v'","_fac_")) {
		assert `v'==0 | `v'==1 | `v'==.
		local prefix = substr("`v'",1,7)
		local suffix = subinstr(substr("`v'",8,100),"_fac_","_",1)
		local newname = "`prefix'" + "_d" + "`suffix'"
		rename `v' `newname'
	}
}
}

  // Standardize names of count variables ("_n_" means number)
{
foreach v of varlist vd_* {
  local label : variable label `v'
	if (regexm("`label'","Number of")) {
		replace `v' = round(`v')
		replace `v' = 0 if `v'<0
		local prefix = substr("`v'",1,6)
		local suffix = substr("`v'",7,100)
		local newname = "`prefix'" + "_n" + "`suffix'"
		rename `v' `newname'
	}
}
foreach v of varlist vdpr_* {
  local label : variable label `v'
	if (regexm("`label'","Number of")) {
		replace `v' = round(`v')
		replace `v' = 0 if `v'<0
		local prefix = substr("`v'",1,8)
		local suffix = substr("`v'",9,100)
		local newname = "`prefix'" + "_n" + "`suffix'"
		rename `v' `newname'
	}
}
foreach v of varlist vdp_* {
  local label : variable label `v'
	if (regexm("`label'","Number of")) {
		replace `v' = round(`v')
		replace `v' = 0 if `v'<0
		local prefix = substr("`v'",1,7)
		local suffix = substr("`v'",8,100)
		local newname = "`prefix'" + "_n" + "`suffix'"
		rename `v' `newname'
	}
}
}
  // Standardize hour-per-day variables ("_h_" means hour)
{
foreach v of varlist vdp_* {
  local label : variable label `v'
	if (regexm("`label'","hours")) {
		replace `v' = round(`v')
		replace `v' = 0 if `v'<0
		replace `v' = 24 if `v'>24 & `v'!=.
		local prefix = substr("`v'",1,7)
		local suffix = subinstr(substr("`v'",8,100),"_hr_","_",1)
		local newname = "`prefix'" + "_h" + "`suffix'"
		rename `v' `newname'
	}
}
}
  // Standardize range variables ("_r_" means range)
{
foreach v of varlist vd_* {
  local label : variable label `v'
	if (regexm("`label'","range") | regexm("`v'","_rang_")) {
		replace `v' = . if inlist(`v',0,1,2,3)==0
		local prefix = substr("`v'",1,6)
		local suffix = subinstr(substr("`v'",7,100),"_rang_","_",1)
		local newname = "`prefix'" + "_r" + "`suffix'"
		local v_d = subinstr("`newname'","_r_","_d_",1)
		local v_n = subinstr("`newname'","_r_","_n_",1)
		cap replace `v' = 0 if `v_d'==1
		cap replace `v' = . if `v_d'==0 & inlist(`v',1,2,3)==0
		cap replace `v' = 0 if `v_n'>0
		cap replace `v' = . if `v_n'==0 & inlist(`v',1,2,3)==0
		rename `v' `newname'
	}
}
foreach v of varlist vdpr_* {
  local label : variable label `v'
	if (regexm("`label'","range") | regexm("`v'","_rang_")) {
		replace `v' = . if inlist(`v',0,1,2,3)==0
		local prefix = substr("`v'",1,8)
		local suffix = subinstr(substr("`v'",9,100),"_rang_","_",1)
		local newname = "`prefix'" + "_r" + "`suffix'"
		local v_d = subinstr("`newname'","_r_","_d_",1)
		local v_n = subinstr("`newname'","_r_","_n_",1)
		cap replace `v' = 0 if `v_d'==1
		cap replace `v' = . if `v_d'==0 & inlist(`v',1,2,3)==0
		cap replace `v' = 0 if `v_n'>0
		cap replace `v' = . if `v_n'==0 & inlist(`v',1,2,3)==0
		rename `v' `newname'
	}
}
foreach v of varlist vdp_* {
  local label : variable label `v'
	if (regexm("`label'","range") | regexm("`v'","_rang_")) {
		replace `v' = . if inlist(`v',0,1,2,3)==0
		local prefix = substr("`v'",1,7)
		local suffix = subinstr(substr("`v'",8,100),"_rang_","_",1)
		local newname = "`prefix'" + "_r" + "`suffix'"
		local v_d = subinstr("`newname'","_r_","_d_",1)
		local v_n = subinstr("`newname'","_r_","_n_",1)
		cap replace `v' = 0 if `v_d'==1
		cap replace `v' = . if `v_d'==0 & inlist(`v',1,2,3)==0
		cap replace `v' = 0 if `v_n'>0
		cap replace `v' = . if `v_n'==0 & inlist(`v',1,2,3)==0
		rename `v' `newname'
	}
}

}  
	
  // Standardize distance variables ("_k_" means distance in km)
{
foreach v of varlist vd_geo_dist_town* {
  cap replace `v' = "2.5" if `v'=="A"
	cap replace `v' = "7.5" if `v'=="B"
	cap replace `v' = "12.5" if `v'=="C"
	destring `v', replace force
	replace `v' = . if `v'<0
	replace `v' = round(`v',0.0001)
	local newname = subinstr("`v'","_dist_","_k_",1)
	rename `v' `newname'
}
}	
	// Standardize area variables ("_a_" means area)
{
foreach v of varlist vd_* {
  local label : variable label `v'
	if (regexm("`label'","Area") | regexm("`label'","area") | regexm("`v'","_area_")) {
		replace `v' = . if `v'<0
		replace `v' = round(`v',0.0001)
		local prefix = substr("`v'",1,6)
		local suffix = subinstr(substr("`v'",7,100),"_area_","_",1)
		local newname = "`prefix'" + "_a" + "`suffix'"
		rename `v' `newname'
	}
}
foreach v of varlist vdpr_* {
  local label : variable label `v'
	if (regexm("`label'","Area") | regexm("`label'","area") | regexm("`v'","_area_")) {
		replace `v' = . if `v'<0
		replace `v' = round(`v',0.0001)
		local prefix = substr("`v'",1,8)
		local suffix = subinstr(substr("`v'",9,100),"_area_","_",1)
		local newname = "`prefix'" + "_a" + "`suffix'"
		rename `v' `newname'
	}
}
foreach v of varlist vdp_* {
  local label : variable label `v'
	if (regexm("`label'","Area") | regexm("`label'","area") | regexm("`v'","_area_")) {
		replace `v' = . if `v'<0
		replace `v' = round(`v',0.0001)
		local prefix = substr("`v'",1,7)
		local suffix = subinstr(substr("`v'",8,100),"_area_","_",1)
		local newname = "`prefix'" + "_a" + "`suffix'"
		rename `v' `newname'
	}
}	
	    // Some basic checks for internal consistency
{		
  /*
	egen temp1_01 = rowtotal(vd_geo_a_irr_oth_01 vd_geo_a_irr_wfall_01 vd_geo_a_irr_w_twell_01 vd_geo_a_irr_tank_lake_01 vd_geo_a_irr_canal_01) 
  egen temp1_11 = rowtotal(vd_geo_a_irr_oth_11 vd_geo_a_irr_wfall_11 vd_geo_a_irr_w_twell_11 vd_geo_a_irr_tank_lake_11 vd_geo_a_irr_canal_11) 
	assert abs(round(temp1_01) -( round(vd_geo_a_irr_01) - round(geo_river_irr_01) ))<2
	
	gen temp1_01_test = vd_geo_a_irr_01 - temp1_01 - geo_river_irr_01
	gen temp1_11_test = vd_geo_a_irr_11 - temp1_11
	assert round(temp1_11) == round(vd_geo_a_irr_11)
	
	egen temp2_01 = rowtotal( vd_geo_a_fores_01 vd_geo_a_irr_01 vd_geo_a_na_cu_01 vd_geo_a_unirr_01 vd_geo_a_cult_waste_01)
	gen temp2_01_test = vd_geo_a_01 - temp2_01

	egen temp2_11 = rowtotal( vd_geo_a_cult_waste_11 vd_geo_a_fores_11 vd_geo_a_na_cu_11 vdp_geo_a_non_ag_11 vdp_geo_a_treecrop vdp*fallow*11 vdp*pasture_11 vdp_geo*sown_11 )
	gen temp2_11_test = vd_geo_a_11 - temp2_11

	gen temp3_11 = vd_geo_a_irr_11 + vd_geo_a_unirr_11
	gen temp3_11_test = vdp_geo_a_sown_11-temp3_11
	
  egen temp4_11 = rowtotal( vd_geo_a_fores_11 vd_geo_a_irr_11 vd_geo_a_na_cu_11 vd_geo_a_unirr_11 vd_geo_a_cult_waste_11)	
	reg temp4_11 vd_geo_a_01, nocons
	drop temp*
	*/
}	

		
	
}	
	
	// Create crop dummies from agricultural commodity variables
{

gen vdp_agr_d_wheat_11 = 0
gen vdp_agr_d_corn_11 = 0
gen vdp_agr_d_rice_11 = 0
gen vdp_agr_d_millet_11 = 0
gen vdp_agr_d_cotton_11 = 0
gen vdp_agr_d_sugar_11 = 0
gen vdp_agr_d_sorghum_11 = 0
gen vdp_agr_d_grndnut_11 = 0
gen vdp_agr_d_coconut_11 = 0
gen vdp_agr_d_vegtbl_11 = 0
gen vdp_agr_d_arecanut_11 = 0
gen vdp_agr_d_banana_11 = 0
gen vdp_agr_d_barley_11 = 0
gen vdp_agr_d_potato_11 = 0
gen vdp_agr_d_pulse_11 = 0
gen vdp_agr_d_mustard_11 = 0
gen vdp_agr_d_sesame_11 = 0
gen temp_missing = agr_comm1_11=="" & agr_comm2_11=="" & agr_comm3_11==""

gen vdp_agr_d_wheat_pri_11 = 0
gen vdp_agr_d_corn_pri_11 = 0
gen vdp_agr_d_rice_pri_11 = 0
gen vdp_agr_d_millet_pri_11 = 0
gen vdp_agr_d_cotton_pri_11 = 0
gen vdp_agr_d_sugar_pri_11 = 0
gen vdp_agr_d_sorghum_pri_11 = 0
gen vdp_agr_d_grndnut_pri_11 = 0
gen vdp_agr_d_coconut_pri_11 = 0
gen vdp_agr_d_vegtbl_pri_11 = 0
gen vdp_agr_d_arecanut_pri_11 = 0
gen vdp_agr_d_banana_pri_11 = 0
gen vdp_agr_d_barley_pri_11 = 0
gen vdp_agr_d_potato_pri_11 = 0
gen vdp_agr_d_pulse_pri_11 = 0
gen vdp_agr_d_mustard_pri_11 = 0
gen vdp_agr_d_sesame_pri_11 = 0
gen temp_missing_pri = agr_comm1_11==""

foreach v of varlist agr_comm1_11 agr_comm2_11 agr_comm3_11 {
  replace `v' = trim(itrim(subinstr(subinstr(subinstr(`v',","," ",.),"/"," ",.),"."," ",.)))
	foreach x in "WHEAT" "WHAT" "WHAET" "WEAT" "WHHEAT" "WHETA" "WTEAT" "WHET" "WQHEAT" "WHEAST" "WHEAS" "WHEET" "WHEEAT" "WHEHAT" "WHAEAT" "WGHEAT" "WAHEAT"  {
	  replace vdp_agr_d_wheat_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "CORN" "MAIZE" "MIZE" "MAZE" "MAIZ" "MIAZE" "MAIZA" "MAIIZE" "MAZIE" {
	  replace vdp_agr_d_corn_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "RICE" "RUCE" "PADDY" "PADY" "PADDDY" "PADDFY" "PADFDY" "PADDUY" "PEDDY" "PASDDY" "PAADY" {
	  replace vdp_agr_d_rice_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "BAJRA" "PEARLMILLET" "PEARL MILLET" "PEARLMILET" "MILLET" "PEARL" "RAGI" "MILLTES" {
	  replace vdp_agr_d_millet_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "COTTON" {
	  replace vdp_agr_d_cotton_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "SUGARCANE" "SUGAR" "SHUGAR" "CANE" "SUGER" {
	  replace vdp_agr_d_sugar_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "JOWAR" "JUWAR" "SORGHUM" {
	  replace vdp_agr_d_sorghum_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "GROUNDNUT" "GROUND NUT" "GROUNUT-NUT" {
	  replace vdp_agr_d_grndnut_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "COCONUTS" "COCONUT" {
	  replace vdp_agr_d_coconut_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "VEGETABLES" "VEGETABLE" "VEGTABLES" "VEGITABLES" "VEGITABLE" "VGETABLES" "VEGETABL;ES" "VEGATABLES" {
	  replace vdp_agr_d_vegtbl_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "ARECANUT" "ARACANUT" "ARE CANUT" "ARECA NUT" "BETEL NUT" "BETEL- NUT" "BEETLE" "BETELNUT" "BETTEL NUT" "BEATEL NUT" "BEATTEL NUT" "BEATTLE NUT" {
	  replace vdp_agr_d_arecanut_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "BANANA" "BANANAA" {
	  replace vdp_agr_d_banana_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "BARLEY" "BARELY" "BARIEY" {
	  replace vdp_agr_d_barley_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "POTATOES" "POTATTO" "POTTATO" "POTATO" "PATATOTS" "PATATOS" "PATATOES" "PATATO" "PORATO" "POPTATO" {
	  replace vdp_agr_d_potato_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in  "GRAM" "PULSES" "PULSE" "PULES" "PIGEON PEA" "PEAGON PEA" "PEAS" "BEANS" "BEAN" "CHANA" "CHANNA" "MUNG" "MOONG" "DAL" "DAAL" "DALS" "PULES" {
	  replace vdp_agr_d_pulse_11 = 1 if regexm(`v',"`x'") & regexm(`v',"SOY")==0
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in  "MUSTARD" "MUSTARED" "MUSTRAD" "MUSTURD" "MUSTURED" "MUSTERED" "MUSTERD" "MASTURD" {
	  replace vdp_agr_d_mustard_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in  "SESAME" "SESAMUM" "SEASAME" "SEASHAME" "SESAMON" {
	  replace vdp_agr_d_sesame_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	

	replace `v' = trim(itrim(`v'))
}


foreach v of varlist agr_comm1_11 {
  replace `v' = trim(itrim(subinstr(subinstr(subinstr(`v',","," ",.),"/"," ",.),"."," ",.)))
	foreach x in "WHEAT" "WHAT" "WHAET" "WEAT" "WHHEAT" "WHETA" "WTEAT" "WHET" "WQHEAT" "WHEAST" "WHEAS" "WHEET" "WHEEAT" "WHEHAT" "WHAEAT" "WGHEAT" "WAHEAT"  {
	  replace vdp_agr_d_wheat_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "CORN" "MAIZE" "MIZE" "MAZE" "MAIZ" "MIAZE" "MAIZA" "MAIIZE" "MAZIE" {
	  replace vdp_agr_d_corn_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "RICE" "RUCE" "PADDY" "PADY" "PADDDY" "PADDFY" "PADFDY" "PADDUY" "PEDDY" "PASDDY" "PAADY" {
	  replace vdp_agr_d_rice_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "BAJRA" "PEARLMILLET" "PEARL MILLET" "PEARLMILET" "MILLET" "PEARL" "RAGI" "MILLTES" {
	  replace vdp_agr_d_millet_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "COTTON" {
	  replace vdp_agr_d_cotton_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "SUGARCANE" "SUGAR" "SHUGAR" "CANE" "SUGER" {
	  replace vdp_agr_d_sugar_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}
	foreach x in "JOWAR" "JUWAR" "SORGHUM" {
	  replace vdp_agr_d_sorghum_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "GROUNDNUT" "GROUND NUT" "GROUNUT-NUT" {
	  replace vdp_agr_d_grndnut_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "COCONUTS" "COCONUT" {
	  replace vdp_agr_d_coconut_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "VEGETABLES" "VEGETABLE" "VEGTABLES" "VEGITABLES" "VEGITABLE" "VGETABLES" "VEGETABL;ES" "VEGATABLES" {
	  replace vdp_agr_d_vegtbl_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "ARECANUT" "ARACANUT" "ARE CANUT" "ARECA NUT" "BETEL NUT" "BETEL- NUT" "BEETLE" "BETELNUT" "BETTEL NUT" "BEATEL NUT" "BEATTEL NUT" "BEATTLE NUT" {
	  replace vdp_agr_d_arecanut_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "BANANA" "BANANAA" {
	  replace vdp_agr_d_banana_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "BARLEY" "BARELY" "BARIEY" {
	  replace vdp_agr_d_barley_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in "POTATOES" "POTATTO" "POTTATO" "POTATO" "PATATOTS" "PATATOS" "PATATOES" "PATATO" "PORATO" "POPTATO" {
	  replace vdp_agr_d_potato_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in  "GRAM" "PULSES" "PULSE" "PULES" "PIGEON PEA" "PEAGON PEA" "PEAS" "BEANS" "BEAN" "CHANA" "CHANNA" "MUNG" "MOONG" "DAL" "DAAL" "DALS" "PULES" {
	  replace vdp_agr_d_pulse_pri_11 = 1 if regexm(`v',"`x'") & regexm(`v',"SOY")==0
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in  "MUSTARD" "MUSTARED" "MUSTRAD" "MUSTURD" "MUSTURED" "MUSTERED" "MUSTERD" "MASTURD" {
	  replace vdp_agr_d_mustard_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	
	foreach x in  "SESAME" "SESAMUM" "SEASAME" "SEASHAME" "SESAMON" {
	  replace vdp_agr_d_sesame_pri_11 = 1 if regexm(`v',"`x'")
		replace `v' = subinstr(`v',"`x'","",1)
	}	

	replace `v' = trim(itrim(`v'))
}

foreach v of varlist vdp_agr_d_*_11 {
  if regexm("`v'","_pri_"){
	  replace `v' = . if temp_missing_pri==1
	}
	else {
	  replace `v' = . if temp_missing==1
	}
}
drop temp_missing*

la var vdp_agr_d_wheat_11 "Dummy for ag commodity = wheat"
la var vdp_agr_d_corn_11 "Dummy for ag commodity = corn"
la var vdp_agr_d_rice_11 "Dummy for ag commodity = rice"
la var vdp_agr_d_millet_11 "Dummy for ag commodity = millet"
la var vdp_agr_d_cotton_11 "Dummy for ag commodity = cotton"
la var vdp_agr_d_sugar_11 "Dummy for ag commodity = sugar"
la var vdp_agr_d_sorghum_11 "Dummy for ag commodity = sorghum"
la var vdp_agr_d_grndnut_11 "Dummy for ag commodity = groundnut"
la var vdp_agr_d_coconut_11 "Dummy for ag commodity = coconut"
la var vdp_agr_d_vegtbl_11 "Dummy for ag commodity = vegetables"
la var vdp_agr_d_arecanut_11 "Dummy for ag commodity = areca nut"
la var vdp_agr_d_banana_11 "Dummy for ag commodity = banana"
la var vdp_agr_d_barley_11 "Dummy for ag commodity = barley"
la var vdp_agr_d_potato_11 "Dummy for ag commodity = potato"
la var vdp_agr_d_pulse_11 "Dummy for ag commodity = pulses, lentils, gram, beans"
la var vdp_agr_d_mustard_11 "Dummy for ag commodity = mustard"
la var vdp_agr_d_sesame_11 "Dummy for ag commodity = sesame"

la var vdp_agr_d_wheat_pri_11 "Dummy for primary ag commodity = wheat"
la var vdp_agr_d_corn_pri_11 "Dummy for primary ag commodity = corn"
la var vdp_agr_d_rice_pri_11 "Dummy for primary ag commodity = rice"
la var vdp_agr_d_millet_pri_11 "Dummy for primary ag commodity = millet"
la var vdp_agr_d_cotton_pri_11 "Dummy for primary ag commodity = cotton"
la var vdp_agr_d_sugar_pri_11 "Dummy for primary ag commodity = sugar"
la var vdp_agr_d_sorghum_pri_11 "Dummy for primary ag commodity = sorghum"
la var vdp_agr_d_grndnut_pri_11 "Dummy for primary ag commodity = groundnut"
la var vdp_agr_d_coconut_pri_11 "Dummy for primary ag commodity = coconut"
la var vdp_agr_d_vegtbl_pri_11 "Dummy for primary ag commodity = vegetables"
la var vdp_agr_d_arecanut_pri_11 "Dummy for primary ag commodity = areca nut"
la var vdp_agr_d_banana_pri_11 "Dummy for primary ag commodity = banana"
la var vdp_agr_d_barley_pri_11 "Dummy for primary ag commodity = barley"
la var vdp_agr_d_potato_pri_11 "Dummy for primary ag commodity = potato"
la var vdp_agr_d_pulse_pri_11 "Dummy for primary ag commodity = pulses, lentils, gram, beans"
la var vdp_agr_d_mustard_pri_11 "Dummy for primary ag commodity = mustard"
la var vdp_agr_d_sesame_pri_11 "Dummy for primary ag commodity = sesame"

}
	// Drop VD variables we aren't using
{	
  drop *man_comm* hand_comm* agr_comm* vdpr_geo_a_canal_govt_01 vdpr_geo_a_canal_pvt_01 vdpr_geo_a_lake_irr_01 vdpr_geo_a_river_irr_01 vdpr_geo_a_tank_irr_01 // commodities will be useful later though
	foreach v of varlist vd_geo_a_01-vdp_ame_d_nut_cntr_11 {
	  if substr("`v'",1,2)!="vd" {
		  drop `v'
	  }
	}
	rename vdpr_geo_a_tw_w_el_01 vdpr_geo_a_irr_tw_w_el_01 
	rename vdpr_geo_a_tw_wo_el_01 vdpr_geo_a_irr_tw_wo_el_01
	rename vdpr_geo_a_well_w_el_01 vdpr_geo_a_irr_well_w_el_01
	rename vdpr_geo_a_well_wo_el_01 vdpr_geo_a_irr_well_wo_el_01
}	


}


********************************************************************************
********************************************************************************

** Step 5: Collapse by pca01_id, our master identifier
{
 
  // Save variable labels for re-labeling post-collapse
{
foreach v of var * {
	local l`v' : variable label `v'
  if `"`l`v''"' == "" {
		local l`v' "`v'"
	}
} 
}

  // Tag duplicates by dataset and reweight 2011 VD variables appropriately
{
duplicates t pca01_id, gen(dup_pca01)
duplicates t pca11_id, gen(dup_pca11)
duplicates t vd01_id, gen(dup_vd01)
duplicates t vd11_id, gen(dup_vd11)

count if dup_pca11!=dup_vd11
foreach v of varlist vd*_???_a_*_11 vd*_???_n_*_11 {
  replace `v' = `v'/(dup_vd11+1)
}
}

  // First collapse by pca11_id (very few dupes here, and virtually all are also pca01_id dupes)
{
collapse (min) vd01_id conc_id v_id_hpca11 vd11_id /// identifiers from other datasets
				 (mean) no_hh11 tot_p11 tot_m11 tot_f11 /// 2011 population, etc
         (mean) pct_06_01-work_marg_ot_f_01 pct_06_11-work_marg_ot_f_11 /// PCA variables (all percentages)
				 (mean) hpca01_* hpca11_* /// HPCA variables (all percentages)
				 (sum) vd*_???_a_* vd*_???_n_* /// VD variables to sum (areas, counts)
				 (min) vd*_???_k_* vd*_???_r_* /// VD variables to take min (distance in km or range)
				 (max) vd*_???_d_* /// VD variables to take max (dummies)
				 (mean) vd*_???_h_* /// VD variables to take average (hours per day)
				 (sum) dup_pca01, by(pca01_id-tot_f pca11_id) fast
}				 

  // Retag VD 2001 duplicates, and reweight appropriately
{
duplicates t vd01_id, gen(dup_vd01)

foreach v of varlist vd*_???_a_*_01 vd*_???_n_*_01 {
  replace `v' = `v'/(dup_vd01+1)
}
}

  // Now collapse by pca01_id (here most dupes are villages that split between 2001-2011)
{
collapse (min) vd01_id conc_id pca11_id v_id_hpca11 vd11_id /// identifiers from other datasets
				 (mean) no_hh11 tot_p11 tot_m11 tot_f11 /// 2011 population, etc
         (mean) pct_06_01-work_marg_ot_f_01 pct_06_11-work_marg_ot_f_11 /// PCA variables (all percentages)
				 (mean) hpca01_* hpca11_* /// HPCA variables (all percentages)
				 (sum) vd*_???_a_* vd*_???_n_* /// VD variables to sum (areas, counts)
				 (min) vd*_???_k_* vd*_???_r_* /// VD variables to take min (distance in km or range)
				 (max) vd*_???_d_* /// VD variables to take max (dummies)
				 (mean) vd*_???_h_* /// VD variables to take average (hours per day)
				 (sum) dup_pca01, by(pca01_id-tot_f) fast
}				 


  // Relabel collapsed variables with saved labels, compress, and save
{
foreach v of var * {
  label var `v' "`l`v''"
}
label var dup_pca01 "Count of PCA01 duplicates collapsed over to get unique dataset"

compress
save "$panel/census_panel_2001_2011.dta", replace

}
}

********************************************************************************
********************************************************************************
